Source code for nlp_architect.solutions.set_expansion.ui.main

# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************

import socket
import pickle
import logging
import sys
import re
from os.path import dirname, join

from bokeh.layouts import column, layout
from bokeh.models import ColumnDataSource, Div, Row, CustomJS
from bokeh.models.widgets import Button, DataTable, TableColumn, CheckboxGroup, MultiSelect
from bokeh.models.widgets.inputs import TextInput
from bokeh.io import curdoc

import nlp_architect.solutions.set_expansion.ui.settings as settings

# pylint: skip-file
logger = logging.getLogger(__name__)

vocab = None
vocab_dict = {}
cut_vocab_dict = {}
max_visible_phrases = 5000
working_text = 'please wait...'
fetching_text = 'Fetching vocabulary from server (one time only), this can take few minutes...'
seed_check_text = ''
all_selected_phrases = []
search_flag = False
max_phrase_length = 40
clear_flag = False
expand_columns = [
    TableColumn(field="res", title="Results"),
    TableColumn(field="score", title="Score")
]
empty_table = {'res': 15 * [''], 'score': 15 * ['']}
checkbox_label = "Show extracted term groups" if settings.grouping else "Show extracted phrases"

# create ui components

seed_input_title = 'Please enter a comma separated seed list of terms:'
seed_input_box = TextInput(
    title=seed_input_title, value="", width=450, css_classes=["seed-input"])
annotation_input = TextInput(title="Please enter text to annotate:", value="", width=400,
                             height=80, css_classes=["annotation-input"])
annotation_output = Div(text='', height=30, width=500, style={'padding-left': '35px'})
annotate_button = Button(label="Annotate", button_type="success", width=150,
                         css_classes=["annotation-button"])
group_info_box = Div(text='', height=30, css_classes=["group-div"])
search_input_box = TextInput(title="Search:", value="", width=300)
expand_button = Button(label="Expand", button_type="success", width=150,
                       css_classes=["expand-button"])
clear_seed_button = Button(
    label="Clear", button_type="success", css_classes=['clear_button'], width=50)
export_button = Button(
    label="Export", button_type="success", css_classes=['export_button'], width=100)
expand_table_source = ColumnDataSource(data=empty_table)
expand_table = DataTable(
    source=expand_table_source, columns=expand_columns, width=500, css_classes=['expand_table'])
phrases_list = MultiSelect(
    title="", value=[], options=[], width=300, size=27, css_classes=['phrases_list'])
checkbox_group = CheckboxGroup(
    labels=["Text annotation", checkbox_label], active=[], width=400,
    css_classes=['checkbox_group'])
annotate_checkbox = CheckboxGroup(
    labels=["Text annotation"], active=[], width=400, css_classes=['annotate_checkbox'])
search_box_area = column(children=[Div(height=10, width=200)])
working_label = Div(
    text="", style={'color': 'blue', 'font-size': '15px'})
search_working_label = Div(
    text="", style={'color': 'blue', 'padding-bottom': '0px', 'font-size': '15px'})
seed_check_label = Div(
    text='', style={'font-size': '15px'}, height=20, width=500)
table_layout = Row(
    expand_table)
table_area = column(children=[table_layout])
seed_layout = column(Row(seed_input_box, column(Div(height=14, width=0), clear_seed_button)),
                     expand_button, table_area)
annotation_layout = column(children=[])

phrases_area = column(children=[search_working_label, Div(width=300)])
checkbox_layout = column(children=[checkbox_group, phrases_area])
grid = layout(
    [
        [working_label, Div(width=250), Div(text="<h1>Set Expansion Demo</h1>")],
        [checkbox_layout, seed_layout, Div(width=50),
         column(Div(height=0, width=0), annotation_layout)],
        [group_info_box, Div(width=500), export_button]
    ]
)


# define callbacks

[docs]def get_vocab(): """ Get vocabulary of the np2vec model from the server """ global vocab logger.info('sending get_vocab request to server...') received = send_request_to_server(['get_vocab']) vocab = received for p in vocab: if len(p) < max_phrase_length: vocab_dict[p] = p cut_vocab_dict[p] = p else: vocab_dict[p] = p[:max_phrase_length - 1] + '...' cut_vocab_dict[p[:max_phrase_length - 1] + '...'] = p
[docs]def send_request_to_server(request): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: # Connect to server and send data sock.connect((settings.expand_host, settings.expand_port)) logger.info('sending request') req_packet = pickle.dumps(request) # sock.sendall(bytes(request + "\n", "utf-8")) sock.sendall(req_packet) # Receive data from the server and shut down data = b"" ctr = 0 while True: packet = sock.recv(134217728) logger.info("%s. received: %s", str(ctr), str(len(packet))) ctr += 1 if not packet: break data += packet logger.info('got response, uncompressing') received = pickle.loads(data) # logger.info("Received: {}".format(received)) return received except EOFError: logger.info('No data received') finally: sock.close()
[docs]def row_selected_callback(indices, old, new): logger.info('row selected callback') global clear_flag, all_selected_phrases if not clear_flag and expand_table_source.data != empty_table: logger.info('row selected callback. old indices=%s. new indices=%s', str(old), str(new)) # sync phrases lists: old_phrases = [expand_table_source.data['res'][p] for p in old] new_phrases = [expand_table_source.data['res'][p] for p in new] logger.info('selected_expand was updated: old=%s ,new=%s', str( old_phrases), str(new_phrases)) # phrase was de-selected from expand list: for o in old_phrases: if o not in new_phrases and \ (vocab is not None and vocab_dict[o] in phrases_list.value): logger.info('removing %s from vocab selected', o) phrases_list.value.remove(vocab_dict[o]) break # new phrase was selected from expand list: for n in new_phrases: if n not in old_phrases and \ (vocab is not None and vocab_dict[n] in phrases_list.options and vocab_dict[n] not in phrases_list.value): phrases_list.value.append(vocab_dict[n]) break update_all_selected_phrases() seed_input_box.value = get_selected_phrases_for_seed()
[docs]def update_all_selected_phrases(): """ Sync selected values from both the expand-table and the vocabulary list """ logger.info('update selected phrases') global all_selected_phrases updated_selected_phrases = all_selected_phrases[:] selected_expand = [expand_table_source.data['res'][i] for i in expand_table_source.selected.indices if expand_table_source.data['res'][i] != ''] selected_vocab = phrases_list.value logger.info('selected expand= %s', str(selected_expand)) logger.info('selected vocab= %s', str(selected_vocab)) logger.info('current all_selected_phrases= %s', str(all_selected_phrases)) for x in all_selected_phrases: logger.info('x= %s', x) if (x in expand_table_source.data['res'] and x not in selected_expand) or ( vocab is not None and (vocab_dict[x] in phrases_list.options) and ( vocab_dict[x] not in selected_vocab) ): logger.info('removing %s', x) updated_selected_phrases.remove(x) for e in selected_expand: if e not in updated_selected_phrases: logger.info('adding %s', e) updated_selected_phrases.append(e) for v in selected_vocab: full_v = cut_vocab_dict[v] if full_v not in updated_selected_phrases: logger.info('adding %s', full_v) updated_selected_phrases.append(full_v) all_selected_phrases = updated_selected_phrases[:] logger.info('all_selected_phrases list was updated: %s', str(all_selected_phrases))
[docs]def checkbox_callback(checked_value): global search_box_area, phrases_area group_info_box.text = '' if 0 in checked_value: annotation_layout.children = [annotation_input, annotate_button, annotation_output] else: annotation_layout.children = [] annotation_output.text = "" if 1 in checked_value: if vocab is None: working_label.text = fetching_text get_vocab() if not phrases_list.options: working_label.text = working_text phrases_list.options = list( cut_vocab_dict.keys())[0:max_visible_phrases] # show the cut representation # search_box_area.children = [search_input_box] phrases_area.children = [search_input_box, search_working_label, phrases_list] working_label.text = '' else: # search_box_area.children = [] phrases_area.children = [] group_info_box.text = ""
[docs]def get_expand_results_callback(): """ Send to the server the seed to expand and set the results in the expand table. """ logger.info('### new expand request') working_label.text = working_text global seed_check_text, table_area try: seed_check_label.text = '' table_area.children = [table_layout] seed = seed_input_box.value logger.info('input seed: %s', seed) if seed == '': expand_table_source.data = empty_table return seed_words = [x.strip() for x in seed.split(',')] bad_words = '' for w in seed_words: res = send_request_to_server(['in_vocab', w]) if res is False: bad_words += ("'" + w + "',") if bad_words != '': seed_check_label.text = 'the words: <span class="bad-word">' \ + bad_words[:-1] \ + '</span> are not in the vocabulary and will be ignored' logger.info('setting table area') table_area.children = [seed_check_label, table_layout] logger.info('sending expand request to server with seed= %s', seed) received = send_request_to_server(['expand', seed]) if received is not None: res = [x[0] for x in received] scores = ["{0:.5f}".format(y[1]) for y in received] logger.info('setting table data') expand_table_source.data = { 'res': res, 'score': scores } else: logger.info('Nothing received from server') except Exception as e: logger.info('Exception: %s', str(e)) finally: working_label.text = ''
[docs]def search_callback(value, old, new): group_info_box.text = '' search_working_label.text = working_text logger.info('search vocab') global vocab, phrases_list, all_selected_phrases, search_flag search_flag = True phrases_list.value = [] if new == '': new_phrases = list(cut_vocab_dict.keys()) else: new_phrases = [] for x in vocab: if x.lower().startswith(new.lower()) and vocab_dict[x] not in new_phrases: new_phrases.append(vocab_dict[x]) phrases_list.options = new_phrases[0:max_visible_phrases] if new != '': phrases_list.options.sort() phrases_list.value = [ vocab_dict[x] for x in all_selected_phrases if vocab_dict[x] in phrases_list.options] logger.info('selected vocab after search= %s', str(phrases_list.value)) search_working_label.text = '' search_flag = False
[docs]def vocab_phrase_selected_callback(attr, old_selected, new_selected): logger.info('vocab selected') if settings.grouping: # show group info if len(new_selected) == 1: res = send_request_to_server(['get_group', new_selected[0]]) if res is not None: group_info_box.text = str(res) global clear_flag if not clear_flag: global all_selected_phrases, search_flag if (search_flag): return logger.info('selected_vocab was updated: old= %s, new= %s', str( old_selected), str(new_selected)) # sync expand table: # phrase was de-selected from vocab list: expand_selected = [expand_table_source.data['res'][p] for p in expand_table_source.selected.indices] for o in old_selected: full_o = cut_vocab_dict[o] if o not in new_selected and full_o in expand_selected: logger.info('%s removed from vocab selected and exists in expand selected', full_o) logger.info('removing %s from expand selected indices. index=%s', full_o, str(expand_table_source.data['res'].index(full_o))) logger.info('current expand indices: %s', str(expand_table_source.selected.indices)) expand_table_source.selected.indices.remove( expand_table_source.data['res'].index(full_o)) logger.info('new expand indices: %s', str(expand_table_source.selected.indices)) break # new phrase was selected from vocab list: for n in new_selected: full_n = cut_vocab_dict[n] logger.info('selected phrase=' + n + ', full phrase=' + full_n) if n not in old_selected and full_n in \ expand_table_source.data['res'] and full_n not in expand_selected: expand_table_source.selected.indices.append( expand_table_source.data['res'].index(full_n)) break update_all_selected_phrases() seed_input_box.value = get_selected_phrases_for_seed()
[docs]def clear_seed_callback(): logger.info('clear') global all_selected_phrases, table_area, clear_flag # table_area.children = [] # needed for refreshing the selections clear_flag = True seed_input_box.value = '' seed_check_label.text = '' expand_table_source.selected.indices = [] phrases_list.value = [] all_selected_phrases = [] table_area.children = [table_layout] clear_flag = False
[docs]def get_selected_phrases_for_seed(): """ create the seed string to send to the server """ global all_selected_phrases phrases = '' for x in all_selected_phrases: phrases += x + ', ' phrases = phrases[:-2] return phrases
[docs]def expand_data_changed_callback(data, old, new): """ remove the selected indices when table is empty """ if old == empty_table: expand_table_source.selected.indices = []
[docs]def annotate_callback(): try: annotation_output.text = working_text user_text = annotation_input.value # if len(user_text) == 0 : # annotation_output.text = "Please provaide valid text to annotate" if len(seed_input_box.value) == 0: out_text = "No seed to compare to" else: out_text = user_text seed = [x.strip() for x in seed_input_box.value.split(',')] res = send_request_to_server(['annotate', seed, user_text]) logger.info("res:%s", str(res)) if len(res) == 0: out_text = "No results found" for np in res: pattern = re.compile(r'\b' + np + r'\b') out_text = re.sub(pattern, mark_phrase_tag(np), out_text) annotation_output.text = out_text except Exception as e: annotation_output.text = "An error occured" logger.error("Error: %s", e)
[docs]def mark_phrase_tag(text): return '<phrase>' + text + '</phrase>'
# set callbacks expand_button.on_click(get_expand_results_callback) expand_table_source.selected.on_change('indices', row_selected_callback) expand_table_source.on_change('data', expand_data_changed_callback) checkbox_group.on_click(checkbox_callback) search_input_box.on_change('value', search_callback) phrases_list.on_change('value', vocab_phrase_selected_callback) clear_seed_button.on_click(clear_seed_callback) with open(join(dirname(__file__), "download.js")) as f: code = f.read() export_button.callback = CustomJS(args=dict(source=expand_table_source), code=code) annotate_button.on_click(annotate_callback) # table_area.on_change('children', table_area_change_callback) # arrange components in page doc = curdoc() main_title = "Set Expansion Demo" doc.title = main_title doc.add_root(grid)